In [204]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [205]:
def plot_special_dates():
    # http://blog.startupdigest.com/2017/03/09/history-major-bitcoin-crashes/
    _ = plt.axvline(x='2013-11', alpha=.2, c='g', label='2013-11 - 1K')
    
    # mt. gox ...
    _ = plt.axvline(x='2014-02', alpha=.2, c='r', label='2014-02 - mt. gox (hacked)')
    _ = plt.axvline(x='2014-04', alpha=.2, c='r', label='2014-04 - mt. gox (liquidation)')
    
    # hack
    _ = plt.axvline(x='2016-08', alpha=.2, c='k', label='2016-08 - hack')
    
    # chinese influence
    _ = plt.axvline(x='2017-01', alpha=.2, c='b', label='2017-01 - chinese mull restrictions')
    
    # price highs ...
    _ = plt.axvline(x='2017-05', alpha=.2, c='g', label='2017-05 - 2K')
    _ = plt.axvline(x='2017-09', alpha=.2, c='g', label='2017-09 - 5K')
    _ = plt.axvline(x='2017-11', alpha=.2, c='g', label='2017-11 - 10K')

In [206]:
scores_df = pd.read_csv('./data/reddit/_all/scores.csv', index_col='author', header=0, low_memory=False)

In [207]:
keys = scores_df.columns[0:]

Top 5 (degr centr) for entire network, per month

  • see "Reddit - Network Degree Analysis.ipynb"

In [208]:
# top degree centrality for entire graph.
top_dc_authors = [
    ('rydan', 0.8739361315196043),
    ('ebaley', 0.7552526611866831),
    ('Explodicle', 0.6387029897432167),
    ('Natanael_L', 0.6086236997163017),
    ('Introshine', 0.5352349603549865)
]

plt.clf()
plt.figure(figsize=(18, 8))

for top_dc_author in range(0,len(top_dc_authors)):
    author = top_dc_authors[top_dc_author][0]
    dc_scores = scores_df.loc[author].fillna("{'degc': 0}").map(lambda x: float(eval(x)['degc']))
    dc_scores = [float('nan') if x==0 else x for x in dc_scores]
    
    _ = plt.plot(keys[:-1], dc_scores[:-1], marker='.', linestyle='none', label=author, markersize=20, alpha=.7)

plot_special_dates()

plt.ylabel('degree centrality')

plt.title('degree centrality by month for top 5')
plt.legend(loc='upper left')
plt.xticks(keys[:-1], rotation='vertical')
plt.show()


<matplotlib.figure.Figure at 0x1ea01ec7710>

Top 5 (eig) for entire network, per month

  • see "Reddit - Network Degree Analysis.ipynb"

In [209]:
top_eigs_authors = [
    ('skereMan', 0.0943111926317215),
    ('BluSyn', 0.046781279146671295),
    ('Myceilingfan', 0.041636910289525986),
    ('DeanMaverick', 0.039750922471284866),
    ('0xDDDD', 0.033229414373636246)
]

plt.clf()
plt.figure(figsize=(18, 8))

for top_eigs_author in range(0,len(top_eigs_authors)):
    author = top_eigs_authors[top_eigs_author][0]
    eigs_scores = scores_df.loc[author].fillna("{'eigs': -1}").map(lambda x: float(eval(x)['eigs']))
    eigs_scores = [float('nan') if x==-1 else x for x in eigs_scores]
    
    _ = plt.plot(keys[:-1], eigs_scores[:-1], marker='.', linestyle='none', label=author, markersize=20, alpha=.7)

plot_special_dates()

plt.ylabel('eigenvector centrality')

plt.title('eigenvector centrality by month for top 5')
plt.legend(loc='upper left')
plt.xticks(keys[:-1], rotation='vertical')
plt.show()


<matplotlib.figure.Figure at 0x1e9c7cc38d0>

nodes per month


In [210]:
nodes_per_month = []
path = r'./data/reddit/_nodes/'
for key in keys:
    npm = len(pd.read_csv(path + key + '_nodes.csv'))
    nodes_per_month.append(npm)

In [211]:
plt.clf()
plt.figure(figsize=(18, 8))

plt.ylabel('number of nodes')

plt.plot(keys[:-1], nodes_per_month[:-1], marker='.', markersize=10)
plot_special_dates()

plt.title('nodes per month')
plt.legend(loc='upper left')
plt.xticks(keys[:-1], rotation='vertical')
plt.show()


<matplotlib.figure.Figure at 0x1e9c8d211d0>

In [212]:
plt.clf()
plt.figure(figsize=(18, 8))

plt.ylabel('log of number of nodes')

plt.plot(keys[:-1], np.log(nodes_per_month[:-1]), marker='.', markersize=10)
plot_special_dates()

plt.title('log of nodes per month')
plt.legend(loc='upper left')
plt.xticks(keys[:-1], rotation='vertical')
plt.show()


<matplotlib.figure.Figure at 0x1e9c8b0ac50>

edges per month


In [213]:
edges_per_month = []
path = r'./data/reddit/_edges/'
for key in keys:
    npm = len(pd.read_csv(path + key + '_edgelist.csv'))
    edges_per_month.append(npm)

In [214]:
plt.clf()
plt.figure(figsize=(18, 8))

plt.ylabel('log of number of edges')

plt.plot(keys[:-1], edges_per_month[:-1], marker='.', markersize=10)
plot_special_dates()

plt.title('edges per month')
plt.legend(loc='upper left')
plt.xticks(keys[:-1], rotation='vertical')
plt.show()


<matplotlib.figure.Figure at 0x1ea6a02c5f8>

In [215]:
plt.clf()
plt.figure(figsize=(18, 8))

plt.ylabel('log of number of edges')

plt.plot(keys[:-1], np.log(edges_per_month[:-1]), marker='.', markersize=10)
plot_special_dates()

plt.title('log of edges per month')
plt.legend(loc='upper left')
plt.xticks(keys[:-1], rotation='vertical')
plt.show()


C:\Users\dmpas\Anaconda3\lib\site-packages\ipykernel_launcher.py:6: RuntimeWarning: divide by zero encountered in log
  
<matplotlib.figure.Figure at 0x1e9c6903550>

degree centrality


In [216]:
dc_max_ = []
dc_min_ = []
dc_mean_ = []
dc_median_ = []

for key in keys:
    dc_scores = scores_df[key].fillna("{'degc': 0}").map(lambda x: float(eval(x)['degc']))
    dc_scores = np.array([x for x in dc_scores if not x==0])
                         
    dc_max_.append(dc_scores.max())
    dc_min_.append(dc_scores.min())
    dc_mean_.append(dc_scores.mean())
    dc_median_.append(np.median(dc_scores))

In [217]:
plt.clf()
plt.figure(figsize=(18, 8))

plt.ylabel('degree centrality')

plt.plot(keys[:-1], dc_max_[:-1], label='max', color='r', marker='.', markersize=10, alpha=.5)
plt.plot(keys[:-1], dc_min_[:-1], label='min', color='b', marker='.', markersize=10, alpha=.5)
plt.plot(keys[:-1], dc_mean_[:-1], label='mean', color='k', marker='.', markersize=10, alpha=.5)
plt.plot(keys[:-1], dc_median_[:-1], label='median', color='c', marker='.', markersize=10, alpha=.5)
plot_special_dates()

plt.title('degree centrality per month')
plt.legend(loc='upper left')
plt.xticks(keys[:-1], rotation='vertical')
plt.show()


<matplotlib.figure.Figure at 0x1ea400d8d30>

In [218]:
plt.clf()
plt.figure(figsize=(18, 8))

plt.ylabel('log of degree centrality')

plt.plot(keys[:-1], np.log(dc_max_[:-1]), label='max', color='r', marker='.', markersize=10, alpha=.5)
plt.plot(keys[:-1], np.log(dc_min_[:-1]), label='min', color='b', marker='.', markersize=10, alpha=.5)
plt.plot(keys[:-1], np.log(dc_mean_[:-1]), label='mean', color='k', marker='.', markersize=10, alpha=.5)
plt.plot(keys[:-1], np.log(dc_median_[:-1]), label='median', color='c', marker='.', markersize=10, alpha=.5)
plot_special_dates()

plt.title('log of degree centrality per month')
plt.legend(loc='lower left')
plt.xticks(keys[:-1], rotation='vertical')
plt.show()


<matplotlib.figure.Figure at 0x1e9c4af66d8>

eigenvector centrality


In [219]:
eigs_max_ = []
eigs_min_ = []
eigs_mean_ = []
eigs_median_ = []

for key in keys:
    eigs_scores = scores_df[key].fillna("{'eigs': 0}").map(lambda x: float(eval(x)['eigs']))
    eigs_scores = np.array([x for x in eigs_scores if not x==0])
                         
    eigs_max_.append(eigs_scores.max())
    eigs_min_.append(eigs_scores.min())
    eigs_mean_.append(eigs_scores.mean())
    eigs_median_.append(np.median(eigs_scores))

In [220]:
plt.clf()
plt.figure(figsize=(18, 8))

plt.ylabel('eigenvector centrality')

plt.plot(keys[:-1], eigs_max_[:-1], label='max', color='r', marker='.', markersize=10, alpha=.5)
plt.plot(keys[:-1], eigs_min_[:-1], label='min', color='b', marker='.', markersize=10, alpha=.5)
plt.plot(keys[:-1], eigs_mean_[:-1], label='mean', color='k', marker='.', markersize=10, alpha=.5)
plt.plot(keys[:-1], eigs_median_[:-1], label='median', color='c', marker='.', markersize=10, alpha=.5)
plot_special_dates()

plt.title('eigenvector centrality per month')
plt.legend(loc='upper right')
plt.xticks(keys[:-1], rotation='vertical')
plt.show()


<matplotlib.figure.Figure at 0x1e9c7b882e8>

In [221]:
plt.clf()
plt.figure(figsize=(18, 8))

plt.ylabel('log of eigenvector centrality')

plt.plot(keys[:-1], np.log(eigs_max_[:-1]), label='max', color='r', marker='.', markersize=10, alpha=.5)
plt.plot(keys[:-1], np.log(eigs_min_[:-1]), label='min', color='b', marker='.', markersize=10, alpha=.5)
plt.plot(keys[:-1], np.log(eigs_mean_[:-1]), label='mean', color='k', marker='.', markersize=10, alpha=.5)
plt.plot(keys[:-1], np.log(eigs_median_[:-1]), label='median', color='c', marker='.', markersize=10, alpha=.5)
plot_special_dates()

plt.title('log of eigenvector centrality per month')
plt.legend(loc='lower left')
plt.xticks(keys[:-1], rotation='vertical')
plt.show()


<matplotlib.figure.Figure at 0x1e9c68fccf8>

In [ ]: